/* c_gen_pscores - ********************************************************************

Cleans assignment results. Generates pscores to assignment to any pre school. Merges in geographic
data.

***************************************************************************************/
set more off

local year "1997 1998 1999 2000 2001 2002 2003"
local grade "K0 K1"

foreach y of local year {
	foreach g of local grade {

		use "$stata_data_assignment/`y'/StudentIDinfo`g'_modified.dta", clear


		gen roffer = sep==string(ch) & sepprog==pch


		keep studentno ch pch wch roffer geocode pref random bilingual num_ranked first_walk East


		gen year = `y'
		gen pid = string(ch) + pch + "00" if !missing(ch,pch)
		gen walk = inlist(wch,"W")
		tab walk
		clonevar schoolcode = ch
		destring geocode, replace
		gen sch = string(ch)

		merge m:1 sch using "$stata_data/school_coding.dta", nogen keep(master match)
		gen DateOfBirth = .a


		replace schoolcode = 1053 if schoolcode==4010		// agassiz became margarita muniz
		replace schoolcode = 2360 if schoolcode==4054		// east boston ELC
		replace schoolcode = 4345 if schoolcode==4340		// marshall became UP academy dorchester.
		replace schoolcode = 1171 if schoolcode==4170
		replace schoolcode = 1230 if schoolcode==4190
		replace schoolcode = 4621 if schoolcode==4221
		replace schoolcode = 4272 if schoolcode==4271
		replace schoolcode = 1340 if schoolcode==4371
		replace schoolcode = 4291 if schoolcode==4170
		replace schoolcode = 4123 if schoolcode==4120
		replace schoolcode = 1440  if schoolcode==4571

		merge m:1 geocode schoolcode using "$raw_data_bps/Pre-K/geodistmat.dta", gen(_mdistmat) keep(master matched)

		sum distance if roffer==1, detail

		foreach p in 25 50 75 90 {
			gen dp`p' = distance>r(p`p') & !missing(distance)
		}

		bys geocode (distance) : gen closestschoolcode = schoolcode[1]
		gen closest = closestschoolcode==schoolcode

		bys studentno (distance) : gen closestinlist = _n==1

		gen missingdistance = missing(distance)
		bys studentno missingdistance (distance) : gen farinlist = _n==_N if missingdistance==0

		gen any = 1

		duplicates drop studentno sch, force

		bys student (pref) : replace pref = _n

		gen first = pref==1

		tempfile feature
		save `feature'

		use "$stata_data_assignment/`y'/asnprob`g'.dta", clear
		gen sch = substr(pid,1,4)
		collapse (sum) asnprob (min) pref (max) soffer, by(sch studentno)
		merge 1:1 sch studentno using `feature', gen(_mfeature) keep(master matched)

		levelsof sch
		foreach sch in `r(levels)' {
			gen sch_`sch' = sch == "`sch'"
		}

		local treatlist "dp25 dp50 dp75 dp90 closest walk closestinlist farinlist any first ELC nLC sch_*"

		foreach x of varlist `treatlist' {
			bys studentno : egen pform`x' = sum(`x'*asnprob)
			bys studentno : egen roff`x' = max(roffer*`x')
			bys studentno : egen soff`x' = max(soffer*`x')
		}

		duplicates drop studentno, force

		foreach x of varlist `treatlist' {
			bys pform`x' : egen pfreq`x' = mean(soff`x')
		}

		rename sch schsim

		rename geocode asngeo

		keep studentno pform* pfreq* roff* soff* schsim random asngeo DateOfBirth bilingual East num_ranked first_walk

		save "$stata_data_assignment/`y'/pscores`g'.dta", replace

	}
}
